Module 1

1.Importing Libraries

library(pacman)
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(lubridate)
library(gmodels)
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(reshape2)
## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths

2.Importing dataset

property_dataset <- read.csv("fy2023-property-assessment-data.csv")

a) Creating a subset of interesting variables

  • We’ve used 16 variables out of the total 60 which are - CITY,ZIP_CODE,LU,LAND_SF,LAND_VALUE,TOTAL_VALUE,YR_BUILT,YR_REMODEL,ROOF_STRUCTURE,BED_RMS,FULL_BTH,KITCHENS,TT_RMS,HEAT_TYPE,AC_TYPE and PROP_VIEW.
property_subset <- subset(property_dataset,select = c(CITY,ZIP_CODE,LU,LAND_SF,LAND_VALUE,TOTAL_VALUE,YR_BUILT,YR_REMODEL,ROOF_STRUCTURE,BED_RMS,FULL_BTH,KITCHENS,TT_RMS,HEAT_TYPE,AC_TYPE,PROP_VIEW))
head(property_subset,10)
##           CITY ZIP_CODE LU LAND_SF LAND_VALUE TOTAL_VALUE YR_BUILT YR_REMODEL
## 1  EAST BOSTON     2128 R3    1150     195700      784200     1900         NA
## 2  EAST BOSTON     2128 R3    1150     196500      811800     1920       2000
## 3  EAST BOSTON     2128 R3    1150     197100      796300     1905       1985
## 4  EAST BOSTON     2128 R3    1150     197700      727900     1900       1991
## 5  EAST BOSTON     2128 R2    2010     225700      794800     1900       1978
## 6  EAST BOSTON     2128 R3    2500     261200     1288300     1900       2018
## 7  EAST BOSTON     2128 R3    2500     262000     1255200     1900       2009
## 8  EAST BOSTON     2128 R3    2500     262700     1139200     1900         NA
## 9  EAST BOSTON     2128 R3    2500     263200      876300     1900       1998
## 10 EAST BOSTON     2128 R3    2500     224400     1027200     1900       2020
##    ROOF_STRUCTURE BED_RMS FULL_BTH KITCHENS TT_RMS          HEAT_TYPE
## 1        F - Flat       6        3        3     12 W - Ht Water/Steam
## 2        F - Flat       3        3        3      9 F - Forced Hot Air
## 3        F - Flat       5        3        3     13     S - Space Heat
## 4     M - Mansard       5        3        3     11 W - Ht Water/Steam
## 5     M - Mansard       6        3        2     13 W - Ht Water/Steam
## 6        F - Flat      13        6        3     20       E - Electric
## 7        F - Flat      14        5        3     20 W - Ht Water/Steam
## 8        F - Flat      11        3        3     16 W - Ht Water/Steam
## 9        F - Flat       5        3        3     14 W - Ht Water/Steam
## 10       F - Flat       6        3        3     14 W - Ht Water/Steam
##           AC_TYPE   PROP_VIEW
## 1        N - None A - Average
## 2  C - Central AC A - Average
## 3        N - None A - Average
## 4        N - None A - Average
## 5        N - None A - Average
## 6        N - None A - Average
## 7        N - None A - Average
## 8        N - None A - Average
## 9        N - None A - Average
## 10 C - Central AC A - Average

b) Checking data-type of variables

summary(property_subset)
##      CITY              ZIP_CODE         LU               LAND_SF         
##  Length:180627      Min.   :2026   Length:180627      Min.   :      100  
##  Class :character   1st Qu.:2119   Class :character   1st Qu.:     1000  
##  Mode  :character   Median :2127   Mode  :character   Median :     2014  
##                     Mean   :2130                      Mean   :     7816  
##                     3rd Qu.:2131                      3rd Qu.:     4770  
##                     Max.   :2467                      Max.   :101513565  
##                     NA's   :3                         NA's   :7545       
##    LAND_VALUE         TOTAL_VALUE           YR_BUILT       YR_REMODEL   
##  Min.   :        0   Min.   :0.000e+00   Min.   : 1700   Min.   :  201  
##  1st Qu.:        0   1st Qu.:3.796e+05   1st Qu.: 1900   1st Qu.: 1987  
##  Median :        0   Median :6.096e+05   Median : 1920   Median : 2004  
##  Mean   :   376579   Mean   :1.500e+06   Mean   : 1932   Mean   : 2001  
##  3rd Qu.:   225000   3rd Qu.:9.141e+05   3rd Qu.: 1964   3rd Qu.: 2015  
##  Max.   :486046900   Max.   :2.143e+09   Max.   :20198   Max.   :20220  
##                                          NA's   :22930   NA's   :95227  
##  ROOF_STRUCTURE        BED_RMS         FULL_BTH         KITCHENS    
##  Length:180627      Min.   : 0.00   Min.   : 0.000   Min.   :0.000  
##  Class :character   1st Qu.: 2.00   1st Qu.: 1.000   1st Qu.:1.000  
##  Mode  :character   Median : 3.00   Median : 1.000   Median :1.000  
##                     Mean   : 3.16   Mean   : 1.351   Mean   :1.053  
##                     3rd Qu.: 4.00   3rd Qu.: 2.000   3rd Qu.:1.000  
##                     Max.   :17.00   Max.   :17.000   Max.   :5.000  
##                     NA's   :48287   NA's   :11116    NA's   :11114  
##      TT_RMS       HEAT_TYPE           AC_TYPE           PROP_VIEW        
##  Min.   : 1.00   Length:180627      Length:180627      Length:180627     
##  1st Qu.: 4.00   Class :character   Class :character   Class :character  
##  Median : 6.00   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 6.97                                                           
##  3rd Qu.: 9.00                                                           
##  Max.   :20.00                                                           
##  NA's   :48354
  • All the variables have the correct data-type. We’ll treat the outliers and NA values as we visualise each attribute.

3.Visualisations

a) Histogram for distribution of Land Values

  • We cleaned the data using IQR() and also removed values == 0
iqr_land_value <- IQR(property_subset$LAND_VALUE)

# Define lower and upper bounds for outliers
lower_bound <- quantile(property_subset$LAND_VALUE)[2] - 1.5 * iqr_land_value
upper_bound <- quantile(property_subset$LAND_VALUE)[4] + 1.5 * iqr_land_value

# Remove outliers from LAND_VALUE
data_cleaned <- property_subset %>%
  filter(LAND_VALUE >= lower_bound, LAND_VALUE <= upper_bound)

data_cleaned <- data_cleaned %>%
  filter(LAND_VALUE != 0)

land_value <- subset(data_cleaned, select = LAND_VALUE)

gg_land_value <- ggplot(land_value, aes(x = LAND_VALUE)) +
  geom_histogram(fill = "blue", color = "black", binwidth=100000) +
  labs(
    title = "Histogram of Land Values",
    x = "Land Values in Dollars",
    y = "Frequency"
  ) +
  scale_x_continuous(labels = scales::comma) +  # Format x-axis labels with commas
  scale_y_continuous(labels = scales::comma)     # Format y-axis labels with commas

plotly_hist <- ggplotly(gg_land_value)
plotly_hist
  • We’ve plotted a histogram using ggplot() and plotly() and found that maximum Land Value is in the range
    $150,000-$250,000 with a count of 37,284.

b) Histogram for disrtibution of houses among different cities

# Creating a histogram for number of houses per city using ggplot
gg_hist <-ggplot(property_subset, aes(x = reorder(CITY, -table(CITY)[CITY]))) +
  geom_bar(fill = "blue", color = "black") +
  labs(
    title = "Number of houses per city ",
    x = "City",
    y = "Frequency of houses"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

#Using plotly for creating an interactive graph

plotly_hist <- ggplotly(gg_hist)
plotly_hist
  • The above histogram shows us that our data set has maximum number of houses from Boston with a count of 47,104 . We’ve used ggplot() and plotly().

c) Average land value per sqft

  • We’ll calculate the average land value per sqft.
average_land_value_per_sqft <- property_subset %>%
  group_by(CITY) %>%
  summarize(Avg_Land_Value_per_Sqft = sum(LAND_VALUE) / sum(LAND_SF, na.rm = TRUE))

#Using table() to check for blank values

table(property_subset$CITY)
## 
##                           ALLSTON           BOSTON         BRIGHTON 
##                5             4423            47104            11870 
##        BROOKLINE      CHARLESTOWN    CHESTNUT HILL           DEDHAM 
##               24             7230             1017                6 
##       DORCHESTER      EAST BOSTON        HYDE PARK    JAMAICA PLAIN 
##            29212             9951             9207            12104 
##         MATTAPAN           NEWTON        READVILLE       ROSLINDALE 
##             4847                1                2             9210 
##          ROXBURY ROXBURY CROSSING     SOUTH BOSTON     WEST ROXBURY 
##             6168             1832            15439            10975
  • We’ll remove these 5 blank values using subset().
average_land_value_per_sqft <- filter(average_land_value_per_sqft, CITY!="")

Bar chart for average land value per sqft by city

#Using ggplot to plot bar graph for Average land value per sq ft.
ggplot(average_land_value_per_sqft, aes(x = reorder(CITY,-Avg_Land_Value_per_Sqft), y = Avg_Land_Value_per_Sqft)) +
  geom_bar(stat = "identity", fill = "blue") +
  labs(title = "Average Land Value Per Sqft by City", x = "City", y = "Average Land Value Per Sqft")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+ 
  scale_y_continuous(labels = scales::dollar_format(prefix = "$"),limits = c(0,200))+
  geom_text(aes(label = round(Avg_Land_Value_per_Sqft, 2), vjust = -0.5), size = 3)

  • We’ve plotted average land value per sqft using ggplot() and found out that Boston has highest value at $169.56 per sqft

d) Most common roof structure used among different houses

table(property_subset$ROOF_STRUCTURE)
## 
##                F - Flat   G - Gable     H - Hip L - Gambrel M - Mansard 
##       36420       67348       46495       13996        2154       13523 
##   O - Other    S - Shed 
##         343         348
  • We see that there are lot of blank values, we’ll remove these blank values
# Removing blank values
roof_structure_cleaned <- property_subset%>%filter(ROOF_STRUCTURE != "")

Bar Chart for distribution of Roof Structure

# Plotting Bar chart using ggplot

ggplot(roof_structure_cleaned, aes(x = ROOF_STRUCTURE)) +
  geom_bar(fill = "lightblue") +
  labs(title = "Distribution of Roof Structure", x = "Roof Structure", y = "Count") + geom_text(
  aes(label = after_stat(count)), 
  stat = "count", 
  vjust = -0.5
)+ theme(axis.text.x = element_text(angle = 45, hjust = 1))+ 
  scale_y_continuous(limits = c(0,80000))

* We’ve plotted a bar chart using ggplot() and observed that most used roof structure is F-Flat Roof structure followed by G- Gable Roof structure .

e) Distribution of number of houses built per year

  • Cleaning the YR_BUILT column
year_built <- subset(property_subset, select =YR_BUILT)

# Omitting NA values

year_built <- na.omit(year_built)

# Few values in this YR_BUILT column are 5 digit, so we filter them out

year_built <- filter(year_built, YR_BUILT <3000)

Histogram for year the houses were built

gg_yr_built <- ggplot(data = year_built, aes(x = YR_BUILT)) +
  geom_histogram(binwidth = 5
                 , fill = "lightblue", color = "black") +
  labs(title = "Distribution of number of house built by year", x = "Year Built", y = "Number of houses")+
  scale_x_continuous(limits = c(1600, 2050
                                ), breaks = seq(1600,2050, by=50)  )+
  scale_y_continuous(limits = c(0,30000))
plotly_hist <- ggplotly(gg_yr_built)
plotly_hist
  • We’ve plotted a histogram using plotly() to visualise number of houses built in different years. We observe that maximum houses were built in the year 1990 with a count of 28,253.

f) Distribution of number of houses remodeled by year

year_remodel <- subset(property_subset, select =YR_REMODEL)

# Omitting NA values

year_remodel <- na.omit(year_remodel)

# Few values in this YR_BUILT column are 5 digit, so we filter them out

year_remodel <- filter(year_remodel, YR_REMODEL <2030 & YR_REMODEL >1500)

Histogram for year the houses were remodeled

gg_yr_remodel <- ggplot(year_remodel, aes(x = YR_REMODEL)) +
  geom_histogram(binwidth = 5
                 , fill = "lightblue", color = "black") +
  labs(title = "Distribution of Year Remodeled", x = "Year Remodeled", y = "Frequency")+
  scale_x_continuous(limits = c(1900, 2050
                                ), breaks = seq(1900,2050, by=10)  )+
  scale_y_continuous(limits = c(0,30000))
plotly_hist <- ggplotly(gg_yr_remodel)
plotly_hist
  • We’ve plotted a histogram using ggplot() and plotly() and observed that maximum number of houses were remodeled in the year 2015 with a count of 13556.

g) Scatter Plot of Land Area vs Land Value

  • We have filtered out very high values from LAND_SF using subset() since it may affect our visualisation.
#Removing unusually high values and zero values from the LAND_SF and LAND_VALUE

land_area <- subset(property_dataset, LAND_SF !=0 & LAND_VALUE != 0 & LAND_SF <=95000000 & LAND_VALUE <= 400000000)

#Calculating correlation coefficient to understand the strength of relationship between x axis and y axis
correlation_coefficient <- cor(land_area$LAND_SF, land_area$LAND_VALUE)

# Plotting Scatter plot using ggplot
ggplot(land_area, aes(x = LAND_SF, y = LAND_VALUE)) +
  geom_point(size = 1, color = "blue") +
  labs(
    title = "Scatter Plot of Land Area vs Land Value",
    x = "Land Area (Sq. Feet)",
    y = "Land Value"
  ) +
  theme_minimal()+ scale_x_continuous(labels = scales::comma) +
  scale_y_continuous(labels = scales::comma)+geom_text(aes(x = 1000000, y = 350000000, label = paste("Correlation =", round(correlation_coefficient, 2))))

  • We have plotted a scatter plot for Land Area vs Land Value using ggplot() and have obtained a correlation coefficient = 0.44 which indicates moderate positive linear relationship between the two variables. This also indicates that price of land may depend on other variables apart from land area.

h) Distribution of different AC-types per city

  • Using CrossTable() to visualise relation between Cities and AC types
ac_type <- gmodels::CrossTable(property_subset$CITY, property_subset$AC_TYPE,prop.t = FALSE, prop.c = FALSE, prop.chisq = FALSE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  180627 
## 
##  
##                      | property_subset$AC_TYPE 
## property_subset$CITY |                 |  C - Central AC | D - Ductless AC |        N - None |       Row Total | 
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
##                      |               3 |               2 |               0 |               0 |               5 | 
##                      |           0.600 |           0.400 |           0.000 |           0.000 |           0.000 | 
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
##              ALLSTON |            1031 |             834 |              21 |            2537 |            4423 | 
##                      |           0.233 |           0.189 |           0.005 |           0.574 |           0.024 | 
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
##               BOSTON |           15633 |           19368 |             216 |           11887 |           47104 | 
##                      |           0.332 |           0.411 |           0.005 |           0.252 |           0.261 | 
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
##             BRIGHTON |            1936 |            2565 |              62 |            7307 |           11870 | 
##                      |           0.163 |           0.216 |           0.005 |           0.616 |           0.066 | 
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
##            BROOKLINE |               8 |               2 |               0 |              14 |              24 | 
##                      |           0.333 |           0.083 |           0.000 |           0.583 |           0.000 | 
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
##          CHARLESTOWN |            1984 |            3244 |              69 |            1933 |            7230 | 
##                      |           0.274 |           0.449 |           0.010 |           0.267 |           0.040 | 
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
##        CHESTNUT HILL |              50 |             538 |               3 |             426 |            1017 | 
##                      |           0.049 |           0.529 |           0.003 |           0.419 |           0.006 | 
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
##               DEDHAM |               2 |               1 |               0 |               3 |               6 | 
##                      |           0.333 |           0.167 |           0.000 |           0.500 |           0.000 | 
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
##           DORCHESTER |            7276 |            4926 |             150 |           16860 |           29212 | 
##                      |           0.249 |           0.169 |           0.005 |           0.577 |           0.162 | 
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
##          EAST BOSTON |            2639 |            2929 |             108 |            4275 |            9951 | 
##                      |           0.265 |           0.294 |           0.011 |           0.430 |           0.055 | 
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
##            HYDE PARK |            1721 |            1488 |              32 |            5966 |            9207 | 
##                      |           0.187 |           0.162 |           0.003 |           0.648 |           0.051 | 
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
##        JAMAICA PLAIN |            2955 |            3282 |              91 |            5776 |           12104 | 
##                      |           0.244 |           0.271 |           0.008 |           0.477 |           0.067 | 
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
##             MATTAPAN |            1080 |             521 |              17 |            3229 |            4847 | 
##                      |           0.223 |           0.107 |           0.004 |           0.666 |           0.027 | 
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
##               NEWTON |               0 |               0 |               0 |               1 |               1 | 
##                      |           0.000 |           0.000 |           0.000 |           1.000 |           0.000 | 
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
##            READVILLE |               2 |               0 |               0 |               0 |               2 | 
##                      |           1.000 |           0.000 |           0.000 |           0.000 |           0.000 | 
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
##           ROSLINDALE |            1665 |            1791 |              79 |            5675 |            9210 | 
##                      |           0.181 |           0.194 |           0.009 |           0.616 |           0.051 | 
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
##              ROXBURY |            2560 |            1021 |              35 |            2552 |            6168 | 
##                      |           0.415 |           0.166 |           0.006 |           0.414 |           0.034 | 
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
##     ROXBURY CROSSING |             697 |             445 |               8 |             682 |            1832 | 
##                      |           0.380 |           0.243 |           0.004 |           0.372 |           0.010 | 
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
##         SOUTH BOSTON |            4493 |            6698 |              92 |            4156 |           15439 | 
##                      |           0.291 |           0.434 |           0.006 |           0.269 |           0.085 | 
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
##         WEST ROXBURY |            2124 |            2447 |             151 |            6253 |           10975 | 
##                      |           0.194 |           0.223 |           0.014 |           0.570 |           0.061 | 
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
##         Column Total |           47859 |           52102 |            1134 |           79532 |          180627 | 
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
## 
## 
# We have reshaped the data into a long format using `melt()` to plot stacked bar graph.
ac_type_long <- melt(ac_type, id.vars ="AC_TYPE", value.name = "Count")

# Converting Count column to integer for better readability of the stacked bar chart
ac_type_long$Count <- as.integer(ac_type_long$Count)

# Filtering out blank values

ac_type_long <- filter(ac_type_long, y !="" & x !="")

Stacked Bar Chart for different AC types per city

#Using ggplot to plot stacked bar graph
gg_ac_type <- ggplot(ac_type_long, aes(x = x, y = Count, fill = y)) +
  geom_bar(stat = "identity") +
  labs(title = "Ac type counts by City", x = "City", y = "Count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  scale_y_continuous(
    breaks = seq(0,30000, by=4000),
    limits = c(0,45000))
# Converting ggplot to plotly
plotly_plot <- ggplotly(gg_ac_type)


# Show the interactive plotly plot
plotly_plot
  • We’ve plotted a stacked bar chart using ggplot() and plotyly() and observed that maximum houses have C - Central AC and Boston has highest count of C - Central AC with a figure of 19,368. It can also be observed that D- Ductless AC is not very common among the cities in our dataset.

i) Distribution of heat types per city

  • Using CrossTable() to visualise relation between Cities and Heat types
heat_type <- gmodels::CrossTable(property_subset$CITY, property_subset$HEAT_TYPE,prop.t = FALSE, prop.c = FALSE, prop.chisq = FALSE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  180627 
## 
##  
##                      | property_subset$HEAT_TYPE 
## property_subset$CITY |                    |       E - Electric | F - Forced Hot Air |           N - None |          O - Other |      P - Heat Pump |     S - Space Heat | W - Ht Water/Steam |          Row Total | 
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##                      |                  3 |                  0 |                  2 |                  0 |                  0 |                  0 |                  0 |                  0 |                  5 | 
##                      |              0.600 |              0.000 |              0.400 |              0.000 |              0.000 |              0.000 |              0.000 |              0.000 |              0.000 | 
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##              ALLSTON |               1031 |                 68 |                973 |                  1 |                  1 |                 39 |                 16 |               2294 |               4423 | 
##                      |              0.233 |              0.015 |              0.220 |              0.000 |              0.000 |              0.009 |              0.004 |              0.519 |              0.024 | 
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##               BOSTON |              15628 |               2562 |              12040 |                  4 |                  5 |               2960 |                 71 |              13834 |              47104 | 
##                      |              0.332 |              0.054 |              0.256 |              0.000 |              0.000 |              0.063 |              0.002 |              0.294 |              0.261 | 
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##             BRIGHTON |               1937 |                281 |               2512 |                  2 |                  1 |                184 |                  6 |               6947 |              11870 | 
##                      |              0.163 |              0.024 |              0.212 |              0.000 |              0.000 |              0.016 |              0.001 |              0.585 |              0.066 | 
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##            BROOKLINE |                  8 |                  0 |                  2 |                  0 |                  0 |                  0 |                  0 |                 14 |                 24 | 
##                      |              0.333 |              0.000 |              0.083 |              0.000 |              0.000 |              0.000 |              0.000 |              0.583 |              0.000 | 
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##          CHARLESTOWN |               1983 |                259 |               2518 |                  0 |                  1 |                485 |                 23 |               1961 |               7230 | 
##                      |              0.274 |              0.036 |              0.348 |              0.000 |              0.000 |              0.067 |              0.003 |              0.271 |              0.040 | 
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##        CHESTNUT HILL |                 50 |                  1 |                415 |                  0 |                  0 |                  1 |                  0 |                550 |               1017 | 
##                      |              0.049 |              0.001 |              0.408 |              0.000 |              0.000 |              0.001 |              0.000 |              0.541 |              0.006 | 
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##               DEDHAM |                  2 |                  0 |                  1 |                  0 |                  0 |                  0 |                  0 |                  3 |                  6 | 
##                      |              0.333 |              0.000 |              0.167 |              0.000 |              0.000 |              0.000 |              0.000 |              0.500 |              0.000 | 
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##           DORCHESTER |               7273 |                654 |               8271 |                 27 |                  3 |                115 |                 55 |              12814 |              29212 | 
##                      |              0.249 |              0.022 |              0.283 |              0.001 |              0.000 |              0.004 |              0.002 |              0.439 |              0.162 | 
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##          EAST BOSTON |               2639 |                455 |               3086 |                 14 |                  1 |                 62 |                261 |               3433 |               9951 | 
##                      |              0.265 |              0.046 |              0.310 |              0.001 |              0.000 |              0.006 |              0.026 |              0.345 |              0.055 | 
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##            HYDE PARK |               1721 |                259 |               2749 |                 10 |                  5 |                 18 |                 12 |               4433 |               9207 | 
##                      |              0.187 |              0.028 |              0.299 |              0.001 |              0.001 |              0.002 |              0.001 |              0.481 |              0.051 | 
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##        JAMAICA PLAIN |               2954 |                256 |               3379 |                  4 |                  7 |                213 |                 22 |               5269 |              12104 | 
##                      |              0.244 |              0.021 |              0.279 |              0.000 |              0.001 |              0.018 |              0.002 |              0.435 |              0.067 | 
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##             MATTAPAN |               1080 |                 93 |               1269 |                  6 |                  0 |                  1 |                  6 |               2392 |               4847 | 
##                      |              0.223 |              0.019 |              0.262 |              0.001 |              0.000 |              0.000 |              0.001 |              0.494 |              0.027 | 
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##               NEWTON |                  0 |                  0 |                  0 |                  0 |                  0 |                  0 |                  0 |                  1 |                  1 | 
##                      |              0.000 |              0.000 |              0.000 |              0.000 |              0.000 |              0.000 |              0.000 |              1.000 |              0.000 | 
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##            READVILLE |                  2 |                  0 |                  0 |                  0 |                  0 |                  0 |                  0 |                  0 |                  2 | 
##                      |              1.000 |              0.000 |              0.000 |              0.000 |              0.000 |              0.000 |              0.000 |              0.000 |              0.000 | 
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##           ROSLINDALE |               1661 |                196 |               2322 |                  4 |                  2 |                148 |                  1 |               4876 |               9210 | 
##                      |              0.180 |              0.021 |              0.252 |              0.000 |              0.000 |              0.016 |              0.000 |              0.529 |              0.051 | 
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##              ROXBURY |               2560 |                181 |               1490 |                  2 |                  2 |                 34 |                 25 |               1874 |               6168 | 
##                      |              0.415 |              0.029 |              0.242 |              0.000 |              0.000 |              0.006 |              0.004 |              0.304 |              0.034 | 
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##     ROXBURY CROSSING |                697 |                 61 |                343 |                  1 |                  0 |                134 |                  5 |                591 |               1832 | 
##                      |              0.380 |              0.033 |              0.187 |              0.001 |              0.000 |              0.073 |              0.003 |              0.323 |              0.010 | 
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##         SOUTH BOSTON |               4492 |                465 |               6429 |                  1 |                 10 |                101 |                171 |               3770 |              15439 | 
##                      |              0.291 |              0.030 |              0.416 |              0.000 |              0.001 |              0.007 |              0.011 |              0.244 |              0.085 | 
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##         WEST ROXBURY |               2124 |                112 |               2000 |                 13 |                  3 |                124 |                  3 |               6596 |              10975 | 
##                      |              0.194 |              0.010 |              0.182 |              0.001 |              0.000 |              0.011 |              0.000 |              0.601 |              0.061 | 
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##         Column Total |              47845 |               5903 |              49801 |                 89 |                 41 |               4619 |                677 |              71652 |             180627 | 
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## 
## 
# We have reshaped the data into a long format using `melt()` to plot stacked bar graph.
heat_type_long <- melt(heat_type, id.vars ="HEAT_TYPE", value.name = "Count")

# Converting Count column to integer for better readability of the stacked bar chart
heat_type_long$Count <- as.integer(heat_type_long$Count)

# Filtering out blank values

heat_type_long <- filter(heat_type_long, y !="" & x !="")

Stacked Bar Chart for different Heat types per city

#Using ggplot to plot stacked bar graph
gg_heat_type <- ggplot(heat_type_long, aes(x = x, y = Count, fill = y)) +
  geom_bar(stat = "identity") +
  labs(title = "Heat type counts by City", x = "City", y = "Count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  scale_y_continuous(
    breaks = seq(0,40000, by=5000),
    limits = c(0,45000))
# Converting ggplot to plotly
plotly_plot <- ggplotly(gg_heat_type)


# Show the interactive plotly plot
plotly_plot
  • We’ve plotted a stacked bar chart using ggplot() and plotyly() and observed that most common heating types are F - Forced Hot Air and W - Ht Water/Steam.